In [351]:
file = '/Users/schriste/Downloads/AC_H1_EPM_201008.txt'
In [352]:
import pandas
from datetime import datetime
import matplotlib.pyplot as plt
%matplotlib inline
In [353]:
col_names = ['FP6P_.761-1.22MEV_IONS', 'FP7P_1.22-4.97MEV_IONS', 'UNC_FP6P_.761-1.22MEV_IONS', 'UNC_FP7P_1.22-4.97MEV_IONS']
names = ['date', 'hour'] + col_names
data = pandas.read_csv(file, skiprows=74, delim_whitespace=True, names = names)
In [354]:
data
Out[354]:
the last few lines in the file need to be removed
In [355]:
data = data.truncate(after=len(data)-5)
now create the time indices
In [356]:
times = [datetime.strptime(t[0:-4], '%d-%m-%Y %H:%M:%S') for t in data['date'] + ' ' + data['hour']]
add this array to the dataFrame and set it as the index
In [357]:
data['times'] = times
data = data.set_index('times')
now drop the no longer need columns
In [358]:
data = data.drop('hour',1)
data = data.drop('date',1)
for some reasons the data is not being parsed properly as floats
In [359]:
data.dtypes
Out[359]:
convert those to floats and replace them
In [360]:
for col in col_names:
data[col] = data[col].convert_objects(convert_numeric=True)
In [361]:
data.dtypes
Out[361]:
In [362]:
plt.figure()
data['UNC_FP7P_1.22-4.97MEV_IONS'].plot()
plt.show()
now need to remove the bad values
In [363]:
data['UNC_FP7P_1.22-4.97MEV_IONS'].min()
Out[363]:
In [364]:
for col in col_names:
data[col][data[col] < 0] = np.nan
In [365]:
data[col_names[0]].min()
Out[365]:
In [372]:
plt.figure()
data.plot(subplots=True, figsize=(10, 10))
plt.show()
In [366]: